import requests
from bs4 import BeautifulSoup
import re
import xlwt
import xlrd
url='https://search.cnki.com.cn/Search/ListResult'
headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0'
    }
data = {
        'searchType': 'MulityTermsSearch',
        'ArticleType': '',
        'ReSearch': '',
        'ParamIsNullOrEmpty': 'false',
        'Islegal': 'false',
        'Content': '',
        'Theme': '复变函数教学',  # 设置检索关键字
        'Title': '',
        'KeyWd': '',
        'Author': '',
        'SearchFund': '',
        'Originate': '',
        'Summary': '',
        'ublishTimeBegin': '',
        'PublishTimeEnd': '',
        'MapNumber': '',
        'Name': '',
        'Issn': '',
        'Cn': '',
        'Unit': '',
        'Public': '',
        'Boss': '',
        'FirstBoss': '',
        'Catalog': '',
        'Reference': '',
        'Speciality': '',
        'Type': '',
        'Subject': '',
        'SpecialityCode': '',
        'UnitCode': '',
        'Year': '',
        'AcefuthorFilter': '',
        'BossCode': '',
        'Fund': '',
        'Level': '',
        'Elite': '',
        'Organization': '',
        'Order': '1',
        'Page': '6',  # 设置所爬页数
        'PageIndex': '',
        'ExcludeField': '',
        'ZtCode': '',
        'Smarts': '',
        'trendTimeBegin': '',
        'trendTimeEnd': '',
        'isBig': 'false',
        'source': '4',
        'trendType': '0'
}
r=requests.post(url=url,data=data,headers=headers)
soup=BeautifulSoup(r.text,"html.parser")
title=re.findall('target=\"_blank\" title=\"(.*?)\"',r.text)#文献名
author_periodical=re.findall('<span title=\"(.*?)\"',r.text)
author=author_periodical[::2]#文献作者
periodical=author_periodical[1::2]#期刊名
keyword=re.findall('关键词：.*?\n.*?<a data-key=\"(.*?)\"',r.text)#关键词
pageuiorigin=re.findall('<a href=\"(.*?) target="_blank" class="left" data-ztcode',r.text)
i=0
j=0
pageui=[]#链接
while i<len(title):
    pageui.append(pageuiorigin[i].split('//')[1].split('\"')[0])
    i+=1
abstract=[]#摘要
while j<len(title):
    url="https://"+f"{pageui[j]}"
    abs=requests.get(url).text
    abscontent=re.search('【摘要】：</strong></font>(.*?)</div>',abs).group(1)
    abstract.append(abscontent)
    j+=1

result=xlwt.Workbook(encoding='utf-8',style_compression=0)
mysheet=result.add_sheet('爬取结果',cell_overwrite_ok=True)
col=('文献名','作者','期刊','关键词','摘要','链接')
#daralist=[[title],[author],[periodical],[keyword],[abstract],[pageui]]
for i in range(0,6):
    mysheet.write(0,i,col[i])
x=0
y=0
for t in range(1,21):
        mysheet.write(t, 0, str(title[t - 1]))
        mysheet.write(t, 1, str(author[t - 1]))
        mysheet.write(t, 2, str(periodical[t - 1]))
        mysheet.write(t, 3, str(keyword[t - 1]))
        mysheet.write(t, 4, str(abstract[t - 1]))
        mysheet.write(t, 5, str(pageui[t - 1]))
        result.save('cnki.xlsx')
